Exercise 1

After first exercices I feel enthuastic and I am eager to learn more about open data science and R. I hope I will learn basics in coding and I hope I can create a platform that I can use in my future research protects. My GitHub repository link: https://github.com/iinatuomainen/IODS-project


#Exercise 2

Describe the work you have done this week and summarize your learning.

Let’s plot the data

learning2014 <- read.csv(file="~/IODS-project/data/learning2014")
pairs(learning2014[-1], col = learning2014$gender)
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

ggpairs(learning2014, mapping = aes(col=gender, alpha = 0.3), lower=list(combo=wrap("facethist", bins=20)))

my_model2 <- lm (points ~ attitude, data = learning2014)
par(mfrow = c(2,2))
plot(my_model2, which = c(1,2,5))


#Read both data into R. Exploring the stuctures and dimensions of the data

MAT <- read.csv(file = "~/IODS-project/data/mat")
POR <- read.csv(file = "~/IODS-project/data/por")
dim(MAT)
## [1] 395  34
dim(POR)
## [1] 649  34

#Join two data sets using variables “school”, “sex”, “age”, “address”, “famsize”, “Pstatus”, “Medu”, “Fedu”, “Mjob”, “Fjob”, “reason”, “nursery”,“internet”

join_by <- c("school", "sex", "age", "address", "famsize", "Pstatus", "Medu", "Fedu", "Mjob", "Fjob", "reason", "nursery","internet")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:GGally':
## 
##     nasa
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
math_por <- inner_join(MAT, POR, by = join_by, suffix = c(".MAT", ".POR"))

#The if-else structure (copy fro DataCamp execise)

alc <- select(math_por, one_of(join_by))
notjoined_columns <- colnames(MAT)[!colnames(MAT) %in% join_by]
notjoined_columns
##  [1] "X"          "guardian"   "traveltime" "studytime"  "failures"  
##  [6] "schoolsup"  "famsup"     "paid"       "activities" "higher"    
## [11] "romantic"   "famrel"     "freetime"   "goout"      "Dalc"      
## [16] "Walc"       "health"     "absences"   "G1"         "G2"        
## [21] "G3"
for(column_name in notjoined_columns) {
  # select two columns from 'math_por' with the same original name
  two_columns <- select(math_por, starts_with(column_name))
  # select the first column vector of those two columns
  first_column <- select(two_columns, 1)[[1]]
  # if that first column  vector is numeric...
  if(is.numeric(first_column)) {
    # take a rounded average of each row of the two columns and
    # add the resulting vector to the alc data frame
    alc[column_name] <- round(rowMeans(two_columns))
  } else { # else if it's not numeric...
    # add the first column vector to the alc data frame
    alc[column_name] <- first_column
  }
}

#Average of the answers related to weekday and weekend alcohol consumption to create a new column “alc_use”. Create “high_use”, where TRUE is for students for which “alc_use” is greater than 2, otherwise FALSE

alc <- mutate(alc, alc_use = (Dalc + Walc) / 2)
alc <- mutate(alc, high_use = alc_use > 2)

#Glimpse data and save data set to “data folder”

glimpse(alc)
## Observations: 382
## Variables: 36
## $ school     <fct> GP, GP, GP, GP, GP, GP, GP, GP, GP, GP, GP, GP, GP, G…
## $ sex        <fct> F, F, F, F, F, M, M, F, M, M, F, F, M, M, M, F, F, F,…
## $ age        <int> 18, 17, 15, 15, 16, 16, 16, 17, 15, 15, 15, 15, 15, 1…
## $ address    <fct> U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,…
## $ famsize    <fct> GT3, GT3, LE3, GT3, GT3, LE3, LE3, GT3, LE3, GT3, GT3…
## $ Pstatus    <fct> A, T, T, T, T, T, T, A, A, T, T, T, T, T, A, T, T, T,…
## $ Medu       <int> 4, 1, 1, 4, 3, 4, 2, 4, 3, 3, 4, 2, 4, 4, 2, 4, 4, 3,…
## $ Fedu       <int> 4, 1, 1, 2, 3, 3, 2, 4, 2, 4, 4, 1, 4, 3, 2, 4, 4, 3,…
## $ Mjob       <fct> at_home, at_home, at_home, health, other, services, o…
## $ Fjob       <fct> teacher, other, other, services, other, other, other,…
## $ reason     <fct> course, course, other, home, home, reputation, home, …
## $ nursery    <fct> yes, no, yes, yes, yes, yes, yes, yes, yes, yes, yes,…
## $ internet   <fct> no, yes, yes, yes, no, yes, yes, no, yes, yes, yes, y…
## $ X          <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16…
## $ guardian   <fct> mother, father, mother, mother, father, mother, mothe…
## $ traveltime <dbl> 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 1, 2, 1, 1, 1, 3,…
## $ studytime  <dbl> 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 1, 2, 3, 1, 3, 2,…
## $ failures   <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ schoolsup  <fct> yes, no, yes, no, no, no, no, yes, no, no, no, no, no…
## $ famsup     <fct> no, yes, no, yes, yes, yes, no, yes, yes, yes, yes, y…
## $ paid       <fct> no, no, yes, yes, yes, yes, no, no, yes, yes, yes, no…
## $ activities <fct> no, no, no, yes, no, yes, no, no, no, yes, no, yes, y…
## $ higher     <fct> yes, yes, yes, yes, yes, yes, yes, yes, yes, yes, yes…
## $ romantic   <fct> no, no, no, yes, no, no, no, no, no, no, no, no, no, …
## $ famrel     <dbl> 4, 5, 4, 3, 4, 5, 4, 4, 4, 5, 3, 5, 4, 5, 4, 4, 3, 5,…
## $ freetime   <dbl> 3, 3, 3, 2, 3, 4, 4, 1, 2, 5, 3, 2, 3, 4, 5, 4, 2, 3,…
## $ goout      <dbl> 4, 3, 2, 2, 2, 2, 4, 4, 2, 1, 3, 2, 3, 3, 2, 4, 3, 2,…
## $ Dalc       <dbl> 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Walc       <dbl> 1, 1, 3, 1, 2, 2, 1, 1, 1, 1, 2, 1, 3, 2, 1, 2, 2, 1,…
## $ health     <dbl> 3, 3, 3, 5, 5, 5, 3, 1, 1, 5, 2, 4, 5, 3, 3, 2, 2, 4,…
## $ absences   <dbl> 5, 3, 8, 1, 2, 8, 0, 4, 0, 0, 1, 2, 1, 1, 0, 5, 8, 3,…
## $ G1         <dbl> 2, 7, 10, 14, 8, 14, 12, 8, 16, 13, 12, 10, 13, 11, 1…
## $ G2         <dbl> 8, 8, 10, 14, 12, 14, 12, 9, 17, 14, 11, 12, 14, 11, …
## $ G3         <dbl> 8, 8, 11, 14, 12, 14, 12, 10, 18, 14, 12, 12, 13, 12,…
## $ alc_use    <dbl> 1.0, 1.0, 2.5, 1.0, 1.5, 1.5, 1.0, 1.0, 1.0, 1.0, 1.5…
## $ high_use   <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE…
write.csv(alc, file = "~/IODS-project/data/alc")

#Read data and print out the names of the varibles in the data

colnames(alc)
##  [1] "school"     "sex"        "age"        "address"    "famsize"   
##  [6] "Pstatus"    "Medu"       "Fedu"       "Mjob"       "Fjob"      
## [11] "reason"     "nursery"    "internet"   "X"          "guardian"  
## [16] "traveltime" "studytime"  "failures"   "schoolsup"  "famsup"    
## [21] "paid"       "activities" "higher"     "romantic"   "famrel"    
## [26] "freetime"   "goout"      "Dalc"       "Walc"       "health"    
## [31] "absences"   "G1"         "G2"         "G3"         "alc_use"   
## [36] "high_use"
dim(alc)
## [1] 382  36

#Data includes 35 variables from 385 observations. Data have information about students’ school (2 different schools), their parents’ education and work, information about shcool success, information about students’ freetime. In addition, data includes information about students’ alcohol consumption.

#Study the relationships between high/low alcohol consumption and other variables in the data. I choose variables such as mother education, activities, higher and health. My hypothesis is that higher mother education and higher current health status are associated with low alcohol consumption. Moreover, extra-curricular activities (yes) and students who wants to take higher education are related to lower alcohol consumption.

#Explore numerically and graphically the distributions of variables and their relationships with alcohol consumption.

mean(alc$Medu)
## [1] 2.806283
mean(alc$health)
## [1] 3.573298
mean(alc$alc_use)
## [1] 1.888743
summary(alc$higher)
##  no yes 
##  18 364
summary(alc$activities)
##  no yes 
## 181 201
summary(alc$Medu)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   2.000   3.000   2.806   4.000   4.000
summary(alc$health)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   3.000   4.000   3.573   5.000   5.000
summary(alc$alc_use)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.500   1.889   2.500   5.000
summary(alc$high_use)
##    Mode   FALSE    TRUE 
## logical     268     114
alc %>% group_by(Medu, high_use) %>% summarise(count=n(), mean_edu=mean(Medu))
## # A tibble: 10 x 4
## # Groups:   Medu [5]
##     Medu high_use count mean_edu
##    <int> <lgl>    <int>    <dbl>
##  1     0 FALSE        1        0
##  2     0 TRUE         2        0
##  3     1 FALSE       33        1
##  4     1 TRUE        18        1
##  5     2 FALSE       80        2
##  6     2 TRUE        18        2
##  7     3 FALSE       59        3
##  8     3 TRUE        36        3
##  9     4 FALSE       95        4
## 10     4 TRUE        40        4

#Median of alcohol use is 1.5 and 112 of the patients alcohol consumption is more than point 2 from 1 to 5 scale during one week. Mean of students’ mother eduaction is 2.8, which is close to 3 as secundary education. Mean of students’ current health status is 3.57, which is average from scale 1 from 5. 201 students have extra-curricular activities and 364 students wants to take higher education.

#Logistic regression analyses

m1 <- glm(high_use ~ Medu + activities + higher + health, data = alc, family = "binomial")
summary(m1)
## 
## Call:
## glm(formula = high_use ~ Medu + activities + higher + health, 
##     family = "binomial", data = alc)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3009  -0.8594  -0.7891   1.4521   1.7288  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   -0.43862    0.61980  -0.708   0.4791  
## Medu           0.06602    0.10623   0.622   0.5343  
## activitiesyes -0.22969    0.22812  -1.007   0.3140  
## higheryes     -0.86198    0.49672  -1.735   0.0827 .
## health         0.09205    0.08229   1.119   0.2633  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 465.68  on 381  degrees of freedom
## Residual deviance: 459.87  on 377  degrees of freedom
## AIC: 469.87
## 
## Number of Fisher Scoring iterations: 4
OR <- coef(m1) %>% exp
CI <- confint(m1)%>% exp
## Waiting for profiling to be done...
cbind(OR, CI)
##                      OR     2.5 %   97.5 %
## (Intercept)   0.6449236 0.1884639 2.180536
## Medu          1.0682517 0.8686646 1.318535
## activitiesyes 0.7947811 0.5076034 1.243070
## higheryes     0.4223248 0.1571527 1.133624
## health        1.0964173 0.9346660 1.291396

#None of the selected variables were significantly associated with the higher alcohol use consumption. #Coefficient values for variabes were: Medu 1.07 (95% CI 0.2-2.2), activities (yes) 0.79 (0.9-1.3), higher (yes) 0.42 (0.2-1.1) and health 1.09 (0.9-1.3). If the associations were statistically significant, if the higher health or mother education values grow by one unit, the probability that students would be higher alcohol consumption are 6% for Medu and 9% for health. If student wants to higher education (yes) or she/he have extra-curricular activities (yes), the probability to be lower alcohol use would be 86 % for higher education and 22 % for activity. #Coefficient value is odds ratios between a unit change in explanatory variable. That means if healht change for one unit, the probability to be higher alcohol user would be 9 %. If the predictor variable is binary: if student replied yes for higher education question, she or he has a 86 % probability to be LOWER alcohol consumption user. Again these are not true because of missing statistically significant p-values.

#I am using the variable mother education. #2x2 cross tabulation of predictors versus the actual values

library(ggplot2); library (dplyr)
table(high_use=alc$high_use, Medu=alc$Medu) %>% prop.table() %>% addmargins()
##         Medu
## high_use           0           1           2           3           4
##    FALSE 0.002617801 0.086387435 0.209424084 0.154450262 0.248691099
##    TRUE  0.005235602 0.047120419 0.047120419 0.094240838 0.104712042
##    Sum   0.007853403 0.133507853 0.256544503 0.248691099 0.353403141
##         Medu
## high_use         Sum
##    FALSE 0.701570681
##    TRUE  0.298429319
##    Sum   1.000000000

#predict the probability of high_use

probabilities <- predict(m1, type = "response")

#add the predicted probabilities to “alc”

alc <- mutate(alc, probability = probabilities)

#use the probabilities to make prediction of high_use

alc <- mutate(alc, prediction = probability > 0.5)

#tabulate the target variable versus predictions

table(high_use = alc$high_use, prediction = alc$prediction)
##         prediction
## high_use FALSE TRUE
##    FALSE   263    5
##    TRUE    111    3

#initialize a plot of ‘high_use’ versus ‘probability’ in ‘alc’

g <- ggplot(alc, aes(x = probability, y = high_use, col=prediction))

#define the geom as points and draw the plot

g + geom_point()

#tabulate the target variable versus the predictions

table(high_use = alc$high_use, prediction = alc$prediction) %>% prop.table() %>% addmargins()
##         prediction
## high_use       FALSE        TRUE         Sum
##    FALSE 0.688481675 0.013089005 0.701570681
##    TRUE  0.290575916 0.007853403 0.298429319
##    Sum   0.979057592 0.020942408 1.000000000

#I guess data shows 263 true negative values and 3 true positive valus. Likewise, there is 111 false negative and 5 false positive values.

#define a loss function (average prediction error)

loss_func <- function(class, prob) {
  n_wrong <- abs(class - prob) > 0.5
  mean(n_wrong)
}

#compute the average number of wrong predictions in the (training) data

loss_func(class = alc$high_use, prob = alc$probability)
## [1] 0.3036649

#K-fold cross-validation

library(boot)
cv <- cv.glm(data = alc, cost = loss_func, glmfit = m1, K = nrow(alc))

#average number of wrong predictions in the cross validation

cv$delta[1]
## [1] 0.3115183

#The mean prediction error in my model is 0.30, therefore my model has worse test set performance. Lower number is better for better model.


#Load data and explore the structure and the dimensions of the data.
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
data("Boston")
str(Boston)
## 'data.frame':    506 obs. of  14 variables:
##  $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ chas   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ rm     : num  6.58 6.42 7.18 7 7.15 ...
##  $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ rad    : int  1 2 2 3 3 3 5 5 5 5 ...
##  $ tax    : num  296 242 242 222 222 222 311 311 311 311 ...
##  $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ black  : num  397 397 393 395 397 ...
##  $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
dim(Boston)
## [1] 506  14

#Data Boston includes 506 observations and 14 variables. Most of the variables are numerical and one categirical variable.

#Show a graphical overview of the data and sho summaries of the variables in the data
summary(Boston)
##       crim                zn             indus            chas        
##  Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46   Min.   :0.00000  
##  1st Qu.: 0.08204   1st Qu.:  0.00   1st Qu.: 5.19   1st Qu.:0.00000  
##  Median : 0.25651   Median :  0.00   Median : 9.69   Median :0.00000  
##  Mean   : 3.61352   Mean   : 11.36   Mean   :11.14   Mean   :0.06917  
##  3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10   3rd Qu.:0.00000  
##  Max.   :88.97620   Max.   :100.00   Max.   :27.74   Max.   :1.00000  
##       nox               rm             age              dis        
##  Min.   :0.3850   Min.   :3.561   Min.   :  2.90   Min.   : 1.130  
##  1st Qu.:0.4490   1st Qu.:5.886   1st Qu.: 45.02   1st Qu.: 2.100  
##  Median :0.5380   Median :6.208   Median : 77.50   Median : 3.207  
##  Mean   :0.5547   Mean   :6.285   Mean   : 68.57   Mean   : 3.795  
##  3rd Qu.:0.6240   3rd Qu.:6.623   3rd Qu.: 94.08   3rd Qu.: 5.188  
##  Max.   :0.8710   Max.   :8.780   Max.   :100.00   Max.   :12.127  
##       rad              tax           ptratio          black       
##  Min.   : 1.000   Min.   :187.0   Min.   :12.60   Min.   :  0.32  
##  1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40   1st Qu.:375.38  
##  Median : 5.000   Median :330.0   Median :19.05   Median :391.44  
##  Mean   : 9.549   Mean   :408.2   Mean   :18.46   Mean   :356.67  
##  3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20   3rd Qu.:396.23  
##  Max.   :24.000   Max.   :711.0   Max.   :22.00   Max.   :396.90  
##      lstat            medv      
##  Min.   : 1.73   Min.   : 5.00  
##  1st Qu.: 6.95   1st Qu.:17.02  
##  Median :11.36   Median :21.20  
##  Mean   :12.65   Mean   :22.53  
##  3rd Qu.:16.95   3rd Qu.:25.00  
##  Max.   :37.97   Max.   :50.00
library(corrplot)
## corrplot 0.84 loaded
m <- cor(Boston)
corrplot(m, method = "circle")

#One of the most positive correlation may be between nitrogen oxides concentration and proportion of non-retail business acres per town. Whereas, one of the most negative relationship is between age and weighted mean of distance to five boston employment centers.

#Standardize the dataset and print out summaries of the scaled data.
boston_scaled <- scale(Boston)
summary(boston_scaled)
##       crim                 zn               indus        
##  Min.   :-0.419367   Min.   :-0.48724   Min.   :-1.5563  
##  1st Qu.:-0.410563   1st Qu.:-0.48724   1st Qu.:-0.8668  
##  Median :-0.390280   Median :-0.48724   Median :-0.2109  
##  Mean   : 0.000000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.: 0.007389   3rd Qu.: 0.04872   3rd Qu.: 1.0150  
##  Max.   : 9.924110   Max.   : 3.80047   Max.   : 2.4202  
##       chas              nox                rm               age         
##  Min.   :-0.2723   Min.   :-1.4644   Min.   :-3.8764   Min.   :-2.3331  
##  1st Qu.:-0.2723   1st Qu.:-0.9121   1st Qu.:-0.5681   1st Qu.:-0.8366  
##  Median :-0.2723   Median :-0.1441   Median :-0.1084   Median : 0.3171  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.:-0.2723   3rd Qu.: 0.5981   3rd Qu.: 0.4823   3rd Qu.: 0.9059  
##  Max.   : 3.6648   Max.   : 2.7296   Max.   : 3.5515   Max.   : 1.1164  
##       dis               rad               tax             ptratio       
##  Min.   :-1.2658   Min.   :-0.9819   Min.   :-1.3127   Min.   :-2.7047  
##  1st Qu.:-0.8049   1st Qu.:-0.6373   1st Qu.:-0.7668   1st Qu.:-0.4876  
##  Median :-0.2790   Median :-0.5225   Median :-0.4642   Median : 0.2746  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.6617   3rd Qu.: 1.6596   3rd Qu.: 1.5294   3rd Qu.: 0.8058  
##  Max.   : 3.9566   Max.   : 1.6596   Max.   : 1.7964   Max.   : 1.6372  
##      black             lstat              medv        
##  Min.   :-3.9033   Min.   :-1.5296   Min.   :-1.9063  
##  1st Qu.: 0.2049   1st Qu.:-0.7986   1st Qu.:-0.5989  
##  Median : 0.3808   Median :-0.1811   Median :-0.1449  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.4332   3rd Qu.: 0.6024   3rd Qu.: 0.2683  
##  Max.   : 0.4406   Max.   : 3.5453   Max.   : 2.9865
boston_scaled <- as.data.frame(boston_scaled)

#Means of variables are now 0.00 in every variable.

#Create a categorial variable of the crime rate
summary(boston_scaled$crim)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.419367 -0.410563 -0.390280  0.000000  0.007389  9.924110
bins <- quantile(boston_scaled$crim)
crime <- cut(boston_scaled$crim, breaks = bins, include.lowest = TRUE, labels = c("low", "med_low", "med_high", "high"))
#Drop the olf crime rate variable from the data set
boston_scaled <- dplyr::select(boston_scaled, -crim)
boston_scaled <- data.frame(boston_scaled, crime)
#Divide the dataset to train and test sets
n <- nrow(boston_scaled)
ind <- sample(n, size = n*0.8)
train <- boston_scaled[ind, ]
test <- boston_scaled
#Fit the linear discriminant analysis on the train set.
lda.fit <- lda(crime ~ ., data=train)
#Draw the LDA plot
classes <- as.numeric(train$crime)
plot(lda.fit, dimen = 2, col = classes, pch = classes)

lda.arrows <- function(x, myscale = 1, arrow_heads = 0.1, color = "red", tex = 0.75, choices = c(1,2)){
  heads <- coef(x)
  arrows(x0 = 0, y0 = 0, 
         x1 = myscale * heads[,choices[1]], 
         y1 = myscale * heads[,choices[2]], col=color, length = arrow_heads)
  text(myscale * heads[,choices], labels = row.names(heads), 
       cex = tex, col=color, pos=3)
}

plot(lda.fit, dimen = 2, col = classes, pch = classes) 
lda.arrows(lda.fit, myscale = 1)

#Save the crime categories from the test set and then remove the categorical crime variable from the dataset.
crime_cat <- test$crime
test <- dplyr::select(test, -crime)
summary(test)
##        zn               indus              chas              nox         
##  Min.   :-0.48724   Min.   :-1.5563   Min.   :-0.2723   Min.   :-1.4644  
##  1st Qu.:-0.48724   1st Qu.:-0.8668   1st Qu.:-0.2723   1st Qu.:-0.9121  
##  Median :-0.48724   Median :-0.2109   Median :-0.2723   Median :-0.1441  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.04872   3rd Qu.: 1.0150   3rd Qu.:-0.2723   3rd Qu.: 0.5981  
##  Max.   : 3.80047   Max.   : 2.4202   Max.   : 3.6648   Max.   : 2.7296  
##        rm               age               dis               rad         
##  Min.   :-3.8764   Min.   :-2.3331   Min.   :-1.2658   Min.   :-0.9819  
##  1st Qu.:-0.5681   1st Qu.:-0.8366   1st Qu.:-0.8049   1st Qu.:-0.6373  
##  Median :-0.1084   Median : 0.3171   Median :-0.2790   Median :-0.5225  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.4823   3rd Qu.: 0.9059   3rd Qu.: 0.6617   3rd Qu.: 1.6596  
##  Max.   : 3.5515   Max.   : 1.1164   Max.   : 3.9566   Max.   : 1.6596  
##       tax             ptratio            black             lstat        
##  Min.   :-1.3127   Min.   :-2.7047   Min.   :-3.9033   Min.   :-1.5296  
##  1st Qu.:-0.7668   1st Qu.:-0.4876   1st Qu.: 0.2049   1st Qu.:-0.7986  
##  Median :-0.4642   Median : 0.2746   Median : 0.3808   Median :-0.1811  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 1.5294   3rd Qu.: 0.8058   3rd Qu.: 0.4332   3rd Qu.: 0.6024  
##  Max.   : 1.7964   Max.   : 1.6372   Max.   : 0.4406   Max.   : 3.5453  
##       medv        
##  Min.   :-1.9063  
##  1st Qu.:-0.5989  
##  Median :-0.1449  
##  Mean   : 0.0000  
##  3rd Qu.: 0.2683  
##  Max.   : 2.9865
#Predict the classes with the LDA model on the test data. Cross tabulate the results.
lda.pred <- predict(lda.fit, newdata = test)
table(correct = crime_cat, predicted = lda.pred$class)
##           predicted
## correct    low med_low med_high high
##   low       69      52        6    0
##   med_low   17      93       16    0
##   med_high   1      45       74    6
##   high       0       0        1  126
#Reload the Boston dataset and standardize the dataset. Calculate the distances between the observations. Run k-means and investigates the optimal number of clusters. Visualize the clustes.
data('Boston')
Boston2 <- scale(Boston)
dist_eu <- dist(Boston2)
summary(dist_eu)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1343  3.4625  4.8241  4.9111  6.1863 14.3970
set.seed(123)
k_max <- 10
twcss <- sapply(1:k_max, function(k){kmeans(Boston2, k)$tot.withinss})

library(ggplot2)
qplot(x=1:k_max, y=twcss, geom = 'line')

km <- kmeans(Boston2, centers = 2)
pairs(Boston2, col= km$cluster)

#In the qplot picture, the twcss dramatically drops in number 2, therefore 2 is optimal of clusters


#Data source:

hd <- read.csv("http://s3.amazonaws.com/assets.datacamp.com/production/course_2218/datasets/human_development.csv", stringsAsFactors = F)
gii <- read.csv("http://s3.amazonaws.com/assets.datacamp.com/production/course_2218/datasets/gender_inequality.csv", stringsAsFactors = F, na.strings = "..")
names(hd) [1]<- "HDI"
names(hd) [3]<- "Human.HDI"
names(hd) [4]<- "Life.exp"
names(hd) [5]<- "Years.exp" 
names(hd) [6]<- "Years.mean" 
names(hd) [7]<- "GNI"
names(hd) [8]<- "GNI.calc"

names (gii) [1]<- "GII"
names (gii) [3]<- "gender.GII"
names (gii) [4]<- "mater.mor"
names (gii) [5]<- "adol.birth"
names (gii) [6]<- "rep.parl"
names (gii) [7] <- "sec.edu.F"
names (gii) [8] <- "sec.edu.M"
names (gii) [9] <- "lab.F"
names (gii) [10] <- "lab.M"
gii$edu2.FM <- gii$sec.edu.F/gii$sec.edu.M
gii$lab.FM <- gii$lab.F/gii$lab.M
names(gii)
##  [1] "GII"        "Country"    "gender.GII" "mater.mor"  "adol.birth"
##  [6] "rep.parl"   "sec.edu.F"  "sec.edu.M"  "lab.F"      "lab.M"     
## [11] "edu2.FM"    "lab.FM"
human <- merge(gii, hd, inner_join=Country)
dim(human)
## [1] 195  19
write.csv(human, file = "~/IODS-project/data/human")
read.csv(file = "~/IODS-project/data/human")
##       X                                   Country GII gender.GII mater.mor
## 1     1                               Afghanistan 171      0.693       400
## 2     2                                   Albania  85      0.217        21
## 3     3                                   Algeria  83      0.413        89
## 4     4                                   Andorra  34         NA        NA
## 5     5                                    Angola 149         NA       460
## 6     6                       Antigua and Barbuda  58         NA        NA
## 7     7                               Arab States  NA      0.537       155
## 8     8                                 Argentina  40      0.376        69
## 9     9                                   Armenia  85      0.318        29
## 10   10                                 Australia   2      0.110         6
## 11   11                                   Austria  23      0.053         4
## 12   12                                Azerbaijan  78      0.303        26
## 13   13                                   Bahamas  55      0.298        37
## 14   14                                   Bahrain  45      0.265        22
## 15   15                                Bangladesh 142      0.503       170
## 16   16                                  Barbados  57      0.357        52
## 17   17                                   Belarus  50      0.151         1
## 18   18                                   Belgium  21      0.063         6
## 19   19                                    Belize 101      0.426        45
## 20   20                                     Benin 166      0.614       340
## 21   21                                    Bhutan 132      0.457       120
## 22   22          Bolivia (Plurinational State of) 119      0.444       200
## 23   23                    Bosnia and Herzegovina  85      0.201         8
## 24   24                                  Botswana 106      0.480       170
## 25   25                                    Brazil  75      0.457        69
## 26   26                         Brunei Darussalam  31         NA        27
## 27   27                                  Bulgaria  59      0.212         5
## 28   28                              Burkina Faso 183      0.631       400
## 29   29                                   Burundi 184      0.492       740
## 30   30                                Cabo Verde 122         NA        53
## 31   31                                  Cambodia 143      0.477       170
## 32   32                                  Cameroon 153      0.587       590
## 33   33                                    Canada   9      0.129        11
## 34   34                  Central African Republic 187      0.655       880
## 35   35                                      Chad 185      0.706       980
## 36   36                                     Chile  42      0.338        22
## 37   37                                     China  90      0.191        32
## 38   38                                  Colombia  97      0.429        83
## 39   39                                   Comoros 159         NA       350
## 40   40                                     Congo 136      0.593       410
## 41   41        Congo (Democratic Republic of the) 176      0.673       730
## 42   42                                Costa Rica  69      0.349        38
## 43   43                             Côte d'Ivoire 172      0.679       720
## 44   44                                   Croatia  47      0.149        13
## 45   45                                      Cuba  67      0.356        80
## 46   46                                    Cyprus  32      0.124        10
## 47   47                            Czech Republic  28      0.091         5
## 48   48                                   Denmark   4      0.048         5
## 49   49                                  Djibouti 168         NA       230
## 50   50                                  Dominica  94         NA        NA
## 51   51                        Dominican Republic 101      0.477       100
## 52   52                 East Asia and the Pacific  NA      0.328        72
## 53   53                                   Ecuador  88      0.407        87
## 54   54                                     Egypt 108      0.573        45
## 55   55                               El Salvador 116      0.427        69
## 56   56                         Equatorial Guinea 138         NA       290
## 57   57                                   Eritrea 186         NA       380
## 58   58                                   Estonia  30      0.164        11
## 59   59                                  Ethiopia 174      0.558       420
## 60   60                   Europe and Central Asia  NA      0.300        28
## 61   61                                      Fiji  90      0.418        59
## 62   62                                   Finland  24      0.075         4
## 63   63                                    France  22      0.088        12
## 64   64                                     Gabon 110      0.514       240
## 65   65                                    Gambia 175      0.622       430
## 66   66                                   Georgia  76      0.382        41
## 67   67                                   Germany   6      0.041         7
## 68   68                                     Ghana 140      0.554       380
## 69   69                                    Greece  29      0.146         5
## 70   70                                   Grenada  79         NA        23
## 71   71                                 Guatemala 128      0.533       140
## 72   72                                    Guinea 182         NA       650
## 73   73                             Guinea-Bissau 178         NA       560
## 74   74                                    Guyana 124      0.515       250
## 75   75                                     Haiti 163      0.603       380
## 76   76                                  Honduras 131      0.480       120
## 77   77                    Hong Kong, China (SAR)  12         NA        NA
## 78   78                                   Hungary  44      0.209        14
## 79   79                                   Iceland  16      0.087         4
## 80   80                                     India 130      0.563       190
## 81   81                                 Indonesia 110      0.494       190
## 82   82                Iran (Islamic Republic of)  69      0.515        23
## 83   83                                      Iraq 121      0.539        67
## 84   84                                   Ireland   6      0.113         9
## 85   85                                    Israel  18      0.101         2
## 86   86                                     Italy  27      0.068         4
## 87   87                                   Jamaica  99      0.430        80
## 88   88                                     Japan  20      0.133         6
## 89   89                                    Jordan  80      0.473        50
## 90   90                                Kazakhstan  56      0.267        26
## 91   91                                     Kenya 145      0.552       400
## 92   92                                  Kiribati 137         NA       130
## 93   93                       Korea (Republic of)  17      0.125        27
## 94   94                                    Kuwait  48      0.387        14
## 95   95                                Kyrgyzstan 120      0.353        75
## 96   96          Lao People's Democratic Republic 141         NA        NA
## 97   97           Latin America and the Caribbean  NA      0.415        85
## 98   98                                    Latvia  46      0.167        13
## 99   99                                   Lebanon  67      0.385        16
## 100 100                                   Lesotho 161      0.541       490
## 101 101                                   Liberia 177      0.651       640
## 102 102                                     Libya  94      0.134        15
## 103 103                             Liechtenstein  13         NA        NA
## 104 104                                 Lithuania  37      0.125        11
## 105 105                                Luxembourg  19      0.100        11
## 106 106                                Madagascar 154         NA       440
## 107 107                                    Malawi 173      0.611       510
## 108 108                                  Malaysia  62      0.209        29
## 109 109                                  Maldives 104      0.243        31
## 110 110                                      Mali 179      0.677       550
## 111 111                                     Malta  37      0.227         9
## 112 112                                Mauritania 156      0.610       320
## 113 113                                 Mauritius  63      0.419        73
## 114 114                                    Mexico  74      0.373        49
## 115 115          Micronesia (Federated States of) 123         NA        96
## 116 116                     Moldova (Republic of) 107      0.248        21
## 117 117                                  Mongolia  90      0.325        68
## 118 118                                Montenegro  49      0.171         7
## 119 119                                   Morocco 126      0.525       120
## 120 120                                Mozambique 180      0.591       480
## 121 121                                   Myanmar 148      0.413       200
## 122 122                                   Namibia 126      0.401       130
## 123 123                                     Nepal 145      0.489       190
## 124 124                               Netherlands   5      0.062         6
## 125 125                               New Zealand   9      0.157         8
## 126 126                                 Nicaragua 125      0.449       100
## 127 127                                     Niger 188      0.713       630
## 128 128                                   Nigeria 152         NA       560
## 129 129                                    Norway   1      0.067         4
## 130 130                                      Oman  52      0.275        11
## 131 131                                  Pakistan 147      0.536       170
## 132 132                                     Palau  60         NA        NA
## 133 133                       Palestine, State of 113         NA        NA
## 134 134                                    Panama  60      0.454        85
## 135 135                          Papua New Guinea 158      0.611       220
## 136 136                                  Paraguay 112      0.472       110
## 137 137                                      Peru  84      0.406        89
## 138 138                               Philippines 115      0.420       120
## 139 139                                    Poland  36      0.138         3
## 140 140                                  Portugal  43      0.111         8
## 141 141                                     Qatar  32      0.524         6
## 142 142                                   Romania  52      0.333        33
## 143 143                        Russian Federation  50      0.276        24
## 144 144                                    Rwanda 163      0.400       320
## 145 145                     Saint Kitts and Nevis  77         NA        NA
## 146 146                               Saint Lucia  89         NA        34
## 147 147          Saint Vincent and the Grenadines  97         NA        45
## 148 148                                     Samoa 105      0.457        58
## 149 149                     Sao Tome and Principe 143         NA       210
## 150 150                              Saudi Arabia  39      0.284        16
## 151 151                                   Senegal 170      0.528       320
## 152 152                                    Serbia  66      0.176        16
## 153 153                                Seychelles  64         NA        NA
## 154 154                              Sierra Leone 181      0.650      1100
## 155 155                                 Singapore  11      0.088         6
## 156 156                                  Slovakia  35      0.164         7
## 157 157                                  Slovenia  25      0.016         7
## 158 158                           Solomon Islands 156         NA       130
## 159 159                              South Africa 116      0.407       140
## 160 160                                South Asia  NA      0.536       183
## 161 161                               South Sudan 169         NA       730
## 162 162                                     Spain  26      0.095         4
## 163 163                                 Sri Lanka  73      0.370        29
## 164 164                        Sub-Saharan Africa  NA      0.575       506
## 165 165                                     Sudan 167      0.591       360
## 166 166                                  Suriname 103      0.463       130
## 167 167                                 Swaziland 150      0.557       310
## 168 168                                    Sweden  14      0.055         4
## 169 169                               Switzerland   3      0.028         6
## 170 170                      Syrian Arab Republic 134      0.533        49
## 171 171                                Tajikistan 129      0.357        44
## 172 172             Tanzania (United Republic of) 151      0.547       410
## 173 173                                  Thailand  93      0.380        26
## 174 174 The former Yugoslav Republic of Macedonia  81      0.164         7
## 175 175                               Timor-Leste 133         NA       270
## 176 176                                      Togo 162      0.588       450
## 177 177                                     Tonga 100      0.666       120
## 178 178                       Trinidad and Tobago  64      0.371        84
## 179 179                                   Tunisia  96      0.240        46
## 180 180                                    Turkey  72      0.359        20
## 181 181                              Turkmenistan 109         NA        61
## 182 182                                    Uganda 163      0.538       360
## 183 183                                   Ukraine  81      0.286        23
## 184 184                      United Arab Emirates  41      0.232         8
## 185 185                            United Kingdom  14      0.177         8
## 186 186                             United States   8      0.280        28
## 187 187                                   Uruguay  52      0.313        14
## 188 188                                Uzbekistan 114         NA        36
## 189 189                                   Vanuatu 134         NA        86
## 190 190        Venezuela (Bolivarian Republic of)  71      0.476       110
## 191 191                                  Viet Nam 116      0.308        49
## 192 192                                     World  NA      0.449       210
## 193 193                                     Yemen 160      0.744       270
## 194 194                                    Zambia 139      0.587       280
## 195 195                                  Zimbabwe 155      0.504       470
##     adol.birth rep.parl sec.edu.F sec.edu.M lab.F lab.M   edu2.FM
## 1         86.8     27.6       5.9      29.8  15.8  79.5 0.1979866
## 2         15.3     20.7      81.8      87.9  44.9  65.5 0.9306030
## 3         10.0     25.7      26.7      31.0  15.2  72.2 0.8612903
## 4           NA     50.0      49.5      49.3    NA    NA 1.0040568
## 5        170.2     36.8        NA        NA  63.3  76.9        NA
## 6         49.3     25.7        NA        NA    NA    NA        NA
## 7         45.4     14.0      34.7      47.6  23.2  75.3 0.7289916
## 8         54.4     36.8      56.3      57.6  47.5  75.0 0.9774306
## 9         27.1     10.7      94.0      95.0  54.2  72.6 0.9894737
## 10        12.1     30.5      94.3      94.6  58.8  71.8 0.9968288
## 11         4.1     30.3     100.0     100.0  54.6  67.7 1.0000000
## 12        40.0     15.6      93.7      97.4  62.9  69.6 0.9620123
## 13        28.5     16.7      91.2      87.6  69.3  79.3 1.0410959
## 14        13.8     15.0      56.7      51.4  39.2  86.9 1.1031128
## 15        80.6     20.0      34.1      41.3  57.4  84.1 0.8256659
## 16        48.4     19.6      89.5      87.7  65.9  76.6 1.0205245
## 17        20.6     30.1      87.0      92.2  50.1  63.1 0.9436009
## 18         6.7     42.4      77.5      82.9  47.5  59.3 0.9348613
## 19        71.4     13.3      76.4      75.8  49.2  82.3 1.0079156
## 20        90.2      8.4      11.3      27.0  67.6  78.3 0.4185185
## 21        40.9      8.3      34.0      34.5  66.7  77.2 0.9855072
## 22        71.9     51.8      47.6      59.1  64.2  80.9 0.8054146
## 23        15.1     19.3      44.9      69.8  34.1  57.3 0.6432665
## 24        44.2      9.5      73.6      77.9  71.9  81.6 0.9448010
## 25        70.8      9.6      54.6      52.4  59.4  80.8 1.0419847
## 26        23.0       NA      63.9      67.8  52.6  75.3 0.9424779
## 27        35.9     20.4      93.0      95.7  47.9  59.0 0.9717868
## 28       115.4     13.3       0.9       3.2  77.1  90.0 0.2812500
## 29        30.3     34.9       5.3       8.3  83.3  82.0 0.6385542
## 30        70.6     20.8        NA        NA  51.5  83.7        NA
## 31        44.3     19.0       9.9      22.9  78.8  86.5 0.4323144
## 32       115.8     27.1      21.3      34.9  63.8  76.8 0.6103152
## 33        14.5     28.2     100.0     100.0  61.6  71.0 1.0000000
## 34        98.3     12.5      10.1      26.7  72.6  85.1 0.3782772
## 35       152.0     14.9       1.7       9.9  64.0  79.2 0.1717172
## 36        55.3     15.8      73.3      76.4  49.2  74.8 0.9594241
## 37         8.6     23.6      58.7      71.9  63.9  78.3 0.8164117
## 38        68.5     20.9      56.9      55.6  55.8  79.7 1.0233813
## 39        51.1      3.0        NA        NA  35.2  80.1        NA
## 40       126.7     11.5      39.7      47.0  68.5  73.0 0.8446809
## 41       135.3      8.2      12.8      32.4  70.7  73.2 0.3950617
## 42        60.8     33.3      50.7      50.5  46.6  79.0 1.0039604
## 43       130.3      9.2      14.0      30.1  52.4  81.4 0.4651163
## 44        12.7     25.8      85.0      93.6  44.7  58.4 0.9081197
## 45        43.1     48.9      74.3      78.8  43.4  70.0 0.9428934
## 46         5.5     12.5      76.0      81.7  56.0  71.1 0.9302326
## 47         4.9     18.9      99.9      99.7  51.1  68.3 1.0020060
## 48         5.1     38.0      95.5      96.6  58.7  66.4 0.9886128
## 49        18.6     12.7        NA        NA  36.3  67.7        NA
## 50          NA     21.9      29.7      23.2    NA    NA 1.2801724
## 51        99.6     19.1      55.6      53.1  51.3  78.6 1.0470810
## 52        21.2     18.7      54.7      66.3  62.6  79.4 0.8250377
## 53        77.0     41.6      40.1      39.4  54.7  82.7 1.0177665
## 54        43.0      2.2      43.9      60.6  23.7  74.8 0.7244224
## 55        76.0     27.4      36.8      43.6  47.8  79.0 0.8440367
## 56       112.6     19.7        NA        NA  80.7  92.2        NA
## 57        65.3     22.0        NA        NA  80.0  89.8        NA
## 58        16.8     19.8     100.0     100.0  56.2  68.9 1.0000000
## 59        78.4     25.5       7.8      18.2  78.2  89.3 0.4285714
## 60        30.8     19.0      70.8      80.6  45.6  70.0 0.8784119
## 61        42.8     14.0      64.2      64.5  37.5  72.0 0.9953488
## 62         9.2     42.5     100.0     100.0  55.7  64.0 1.0000000
## 63         5.7     25.7      78.0      83.2  50.7  61.6 0.9375000
## 64       103.0     16.2      53.9      36.1  56.2  65.4 1.4930748
## 65       115.8      9.4      17.4      31.5  72.2  82.9 0.5523810
## 66        46.8     11.3      89.7      92.7  56.5  75.1 0.9676375
## 67         3.8     36.9      96.3      97.0  53.6  66.4 0.9927835
## 68        58.4     10.9      45.2      64.7  67.3  71.4 0.6986090
## 69        11.9     21.0      59.5      67.0  44.2  62.5 0.8880597
## 70        35.4     25.0        NA        NA    NA    NA        NA
## 71        97.2     13.3      21.9      23.2  49.3  88.2 0.9439655
## 72       131.0     21.9        NA        NA  65.6  78.3        NA
## 73        99.3     13.7        NA        NA  68.2  78.5        NA
## 74        88.5     31.3      60.3      47.8  42.6  80.5 1.2615063
## 75        42.0      3.5      22.4      35.2  60.9  71.0 0.6363636
## 76        84.0     25.8      28.0      25.8  42.8  82.9 1.0852713
## 77         3.3       NA      72.2      79.2  51.3  67.8 0.9116162
## 78        12.1     10.1      97.9      98.7  44.8  60.0 0.9918946
## 79        11.5     41.3      91.0      91.6  70.5  77.4 0.9934498
## 80        32.8     12.2      27.0      56.6  27.0  79.9 0.4770318
## 81        48.3     17.1      39.9      49.2  51.4  84.2 0.8109756
## 82        31.6      3.1      62.2      67.6  16.6  73.6 0.9201183
## 83        68.7     26.5      27.8      50.2  14.9  69.8 0.5537849
## 84         8.2     19.9      80.5      78.6  53.1  68.1 1.0241730
## 85         7.8     22.5      84.4      87.3  57.9  69.1 0.9667812
## 86         4.0     30.1      71.2      80.5  39.6  59.5 0.8844720
## 87        70.1     16.7      74.0      70.2  56.1  70.9 1.0541311
## 88         5.4     11.6      87.0      85.8  48.8  70.4 1.0139860
## 89        26.5     11.6      69.5      78.5  15.6  66.6 0.8853503
## 90        29.9     20.1      95.3      98.8  67.7  77.9 0.9645749
## 91        93.6     20.8      25.3      31.4  62.2  72.4 0.8057325
## 92        16.6      8.7        NA        NA    NA    NA        NA
## 93         2.2     16.3      77.0      89.1  50.1  72.1 0.8641975
## 94        14.5      1.5      55.6      56.3  43.6  83.1 0.9875666
## 95        29.3     23.3      94.5      96.8  56.0  79.5 0.9762397
## 96        65.0     25.0      22.9      37.0  76.3  79.1 0.6189189
## 97        68.3     27.0      54.3      55.2  53.7  79.8 0.9836957
## 98        13.5     18.0      98.9      99.0  54.9  67.6 0.9989899
## 99        12.0      3.1      53.0      55.4  23.3  70.9 0.9566787
## 100       89.4     26.8      21.9      19.0  59.0  73.5 1.1526316
## 101      117.4     10.7      15.4      39.3  58.2  64.8 0.3918575
## 102        2.5     16.0      55.5      41.9  30.0  76.4 1.3245823
## 103         NA     20.0        NA        NA    NA    NA        NA
## 104       10.6     23.4      89.1      94.3  55.8  67.3 0.9448568
## 105        8.3     28.3     100.0     100.0  50.7  64.6 1.0000000
## 106      122.8     20.5        NA        NA  86.6  90.5        NA
## 107      144.8     16.7      11.1      21.6  84.6  81.5 0.5138889
## 108        5.7     14.2      65.1      71.3  44.4  75.5 0.9130435
## 109        4.2      5.9      27.3      32.7  56.2  77.5 0.8348624
## 110      175.6      9.5       7.7      15.1  50.8  81.4 0.5099338
## 111       18.2     13.0      68.6      78.2  37.9  66.3 0.8772379
## 112       73.3     22.2       8.3      20.9  28.7  79.1 0.3971292
## 113       30.9     11.6      49.4      58.0  43.6  74.2 0.8517241
## 114       63.4     37.1      55.7      60.6  45.1  79.9 0.9191419
## 115       18.6      0.0        NA        NA    NA    NA        NA
## 116       29.3     20.8      93.6      96.6  37.6  44.2 0.9689441
## 117       18.7     14.9      85.3      84.1  56.6  69.3 1.0142687
## 118       15.2     17.3      84.2      94.7  43.0  57.3 0.8891235
## 119       35.8     11.0      20.7      30.2  26.5  75.8 0.6854305
## 120      137.8     39.6       1.4       6.2  85.5  82.8 0.2258065
## 121       12.1      4.7      22.9      15.3  75.2  82.3 1.4967320
## 122       54.9     37.7      33.3      34.4  54.7  63.7 0.9680233
## 123       73.7     29.5      17.7      38.2  79.9  87.1 0.4633508
## 124        6.2     36.9      87.7      90.5  58.5  70.6 0.9690608
## 125       25.3     31.4      95.0      95.3  62.0  73.8 0.9968520
## 126      100.8     39.1      39.4      38.3  47.4  80.3 1.0287206
## 127      204.8     13.3       2.4       7.8  40.0  89.7 0.3076923
## 128      119.6      6.6        NA        NA  48.2  63.7        NA
## 129        7.8     39.6      97.4      96.7  61.2  68.7 1.0072389
## 130       10.6      9.6      47.2      57.1  29.0  82.6 0.8266200
## 131       27.3     19.7      19.3      46.1  24.6  82.9 0.4186551
## 132         NA     10.3        NA        NA    NA    NA        NA
## 133       45.8       NA      53.9      59.4  15.4  66.4 0.9074074
## 134       78.5     19.3      54.0      49.9  49.0  81.8 1.0821643
## 135       62.1      2.7       7.6      14.5  70.5  74.0 0.5241379
## 136       67.0     16.8      36.8      43.0  55.7  84.8 0.8558140
## 137       50.7     22.3      56.3      66.1  68.2  84.4 0.8517398
## 138       46.8     27.1      65.9      63.7  51.1  79.7 1.0345369
## 139       12.2     22.1      79.4      85.5  48.9  64.9 0.9286550
## 140       12.6     31.3      47.7      48.2  54.9  66.2 0.9896266
## 141        9.5      0.0      66.7      59.0  50.8  95.5 1.1305085
## 142       31.0     12.0      86.1      92.0  48.7  64.9 0.9358696
## 143       25.7     14.5      89.6      92.5  57.1  71.7 0.9686486
## 144       33.6     57.5       8.0       8.8  86.4  85.3 0.9090909
## 145         NA      6.7        NA        NA    NA    NA        NA
## 146       56.3     20.7        NA        NA  62.7  76.2        NA
## 147       54.5     13.0        NA        NA  55.7  78.0        NA
## 148       28.3      6.1      64.3      60.0  23.5  58.4 1.0716667
## 149       65.1     18.2        NA        NA  45.3  77.8        NA
## 150       10.2     19.9      60.5      70.3  20.2  78.3 0.8605974
## 151       94.4     42.7       7.2      15.4  66.0  88.0 0.4675325
## 152       16.9     34.0      58.4      73.6  44.5  60.9 0.7934783
## 153       56.3     43.8      66.9      66.6    NA    NA 1.0045045
## 154      100.7     12.4      10.0      21.7  65.7  69.0 0.4608295
## 155        6.0     25.3      74.1      81.0  58.8  77.2 0.9148148
## 156       15.9     18.7      99.1      99.5  51.1  68.6 0.9959799
## 157        0.6     27.7      95.8      98.0  52.3  63.2 0.9775510
## 158       64.9      2.0        NA        NA  53.4  79.0        NA
## 159       50.9     40.7      72.7      75.9  44.5  60.5 0.9578393
## 160       38.7     17.5      29.1      54.6  29.8  80.3 0.5329670
## 161       75.3     24.3        NA        NA    NA    NA        NA
## 162       10.6     38.0      66.8      73.1  52.5  65.8 0.9138167
## 163       16.9      5.8      72.7      76.4  35.1  76.3 0.9515707
## 164      109.7     22.5      22.1      31.5  65.4  76.6 0.7015873
## 165       84.0     23.8      12.1      18.2  31.3  76.0 0.6648352
## 166       35.2     11.8      44.6      47.1  40.5  68.8 0.9469214
## 167       72.0     14.7      21.9      26.0  43.9  71.6 0.8423077
## 168        6.5     43.6      86.5      87.3  60.3  67.9 0.9908362
## 169        1.9     28.5      95.0      96.6  61.8  74.9 0.9834369
## 170       41.6     12.4      29.5      40.5  13.5  72.7 0.7283951
## 171       42.8     15.2      95.1      91.2  58.9  77.1 1.0427632
## 172      122.7     36.0       5.6       9.5  88.1  90.2 0.5894737
## 173       41.0      6.1      35.7      40.8  64.3  80.7 0.8750000
## 174       18.3     33.3      40.2      55.6  43.1  67.5 0.7230216
## 175       52.2     38.5        NA        NA  24.6  50.8        NA
## 176       91.5     17.6      16.1      40.3  80.6  81.3 0.3995037
## 177       18.1      0.0      87.5      88.3  53.5  74.6 0.9909400
## 178       34.8     24.7      59.7      60.9  53.0  75.5 0.9802956
## 179        4.6     31.3      32.8      46.1  25.1  70.9 0.7114967
## 180       30.9     14.4      39.0      60.0  29.4  70.8 0.6500000
## 181       18.0     25.8        NA        NA  46.9  76.9        NA
## 182      126.6     35.0      22.9      33.5  75.8  79.2 0.6835821
## 183       25.7     11.8      91.7      95.9  53.2  66.9 0.9562044
## 184       27.6     17.5      73.1      61.2  46.5  92.0 1.1944444
## 185       25.8     23.5      99.8      99.9  55.7  68.7 0.9989990
## 186       31.0     19.4      95.1      94.8  56.3  68.9 1.0031646
## 187       58.3     11.5      54.4      50.3  55.6  76.8 1.0815109
## 188       38.8     16.4        NA        NA  48.1  75.6        NA
## 189       44.8      0.0        NA        NA  61.5  80.0        NA
## 190       83.2     17.0      56.6      50.8  51.1  79.2 1.1141732
## 191       29.0     24.3      59.4      71.2  73.0  82.2 0.8342697
## 192       47.4     21.8      54.5      65.4  50.3  76.7 0.8333333
## 193       47.0      0.7       8.6      26.7  25.4  72.2 0.3220974
## 194      125.4     12.7      25.8      44.0  73.1  85.6 0.5863636
## 195       60.3     35.1      48.7      62.0  83.2  89.7 0.7854839
##        lab.FM HDI Human.HDI Life.exp Years.exp Years.mean     GNI GNI.calc
## 1   0.1987421 171     0.465     60.4       9.3        3.2   1,885       -7
## 2   0.6854962  85     0.733     77.8      11.8        9.3   9,943       14
## 3   0.2105263  83     0.736     74.8      14.0        7.6  13,054       -1
## 4          NA  34     0.845     81.3      13.5        9.6  43,978      -18
## 5   0.8231469 149     0.532     52.3      11.4        4.7   6,822      -30
## 6          NA  58     0.783     76.1      14.0        9.2  20,070       -1
## 7   0.3081009  NA     0.686     70.6      12.0        6.4  15,722       NA
## 8   0.6333333  40     0.836     76.3      17.9        9.8  22,050       11
## 9   0.7465565  85     0.733     74.7      12.3       10.9   8,124       22
## 10  0.8189415   2     0.935     82.4      20.2       13.0  42,261       17
## 11  0.8064993  23     0.885     81.4      15.7       10.8  43,869       -5
## 12  0.9037356  78     0.751     70.8      11.9       11.2  16,428      -11
## 13  0.8738966  55     0.790     75.4      12.6       10.9  21,336       -3
## 14  0.4510932  45     0.824     76.6      14.4        9.4  38,599      -20
## 15  0.6825208 142     0.570     71.6      10.0        5.1   3,191        5
## 16  0.8603133  57     0.785     75.6      15.4       10.5  12,488       27
## 17  0.7939778  50     0.798     71.3      15.7       12.0  16,676       14
## 18  0.8010118  21     0.890     80.8      16.3       11.3  41,187        0
## 19  0.5978129 101     0.715     70.0      13.6       10.5   7,614        9
## 20  0.8633461 166     0.480     59.6      11.1        3.3   1,767        0
## 21  0.8639896 132     0.605     69.5      12.6        3.0   7,176      -17
## 22  0.7935723 119     0.662     68.3      13.2        8.2   5,760        4
## 23  0.5951134  85     0.733     76.5      13.6        8.3   9,638       19
## 24  0.8811275 106     0.698     64.5      12.5        8.9  16,646      -41
## 25  0.7351485  75     0.755     74.5      15.2        7.7  15,175       -1
## 26  0.6985392  31     0.856     78.8      14.5        8.8  72,570      -26
## 27  0.8118644  59     0.782     74.2      14.4       10.6  15,596       13
## 28  0.8566667 183     0.402     58.7       7.8        1.4   1,591      -13
## 29  1.0158537 184     0.400     56.7      10.1        2.7     758        1
## 30  0.6152927 122     0.646     73.3      13.5        4.7   6,094       -1
## 31  0.9109827 143     0.555     68.4      10.9        4.4   2,949        7
## 32  0.8307292 153     0.512     55.5      10.4        6.0   2,803       -1
## 33  0.8676056   9     0.913     82.0      15.9       13.0  42,155       11
## 34  0.8531140 187     0.350     50.7       7.2        4.2     581        1
## 35  0.8080808 185     0.392     51.6       7.4        1.9   2,085      -22
## 36  0.6577540  42     0.832     81.7      15.2        9.8  21,290       11
## 37  0.8160920  90     0.727     75.8      13.1        7.5  12,547       -7
## 38  0.7001255  97     0.720     74.0      13.5        7.3  12,040       -9
## 39  0.4394507 159     0.503     63.3      11.5        4.6   1,456       16
## 40  0.9383562 136     0.591     62.3      11.1        6.1   6,012      -14
## 41  0.9658470 176     0.433     58.7       9.8        6.0     680       11
## 42  0.5898734  69     0.766     79.4      13.9        8.4  13,413       10
## 43  0.6437346 172     0.462     51.5       8.9        4.3   3,171      -24
## 44  0.7654110  47     0.818     77.3      14.8       11.0  19,409       11
## 45  0.6200000  67     0.769     79.4      13.8       11.5   7,301       47
## 46  0.7876231  32     0.850     80.2      14.0       11.6  28,633        3
## 47  0.7481698  28     0.870     78.6      16.4       12.3  26,660       10
## 48  0.8840361   4     0.923     80.2      18.7       12.7  44,025       11
## 49  0.5361891 168     0.470     62.0       6.4        3.8   3,276      -22
## 50         NA  94     0.724     77.8      12.7        7.9   9,994        4
## 51  0.6526718 101     0.715     73.5      13.1        7.6  11,883      -12
## 52  0.7884131  NA     0.710     74.0      12.7        7.5  11,449       NA
## 53  0.6614268  88     0.732     75.9      14.2        7.6  10,605        7
## 54  0.3168449 108     0.690     71.1      13.5        6.6  10,512      -12
## 55  0.6050633 116     0.666     73.0      12.3        6.5   7,349       -3
## 56  0.8752711 138     0.587     57.6       9.0        5.5  21,056      -84
## 57  0.8908686 186     0.391     63.7       4.1        3.9   1,130       -6
## 58  0.8156749  30     0.861     76.8      16.5       12.5  25,214       12
## 59  0.8756999 174     0.442     64.1       8.5        2.4   1,428        2
## 60  0.6514286  NA     0.748     72.3      13.6       10.0  12,791       NA
## 61  0.5208333  90     0.727     70.0      15.7        9.9   7,493       21
## 62  0.8703125  24     0.883     80.8      17.1       10.3  38,695        0
## 63  0.8230519  22     0.888     82.2      16.0       11.1  38,056        4
## 64  0.8593272 110     0.684     64.4      12.5        7.8  16,367      -42
## 65  0.8709288 175     0.441     60.2       8.8        2.8   1,507       -2
## 66  0.7523302  76     0.754     74.9      13.8       12.1   7,164       40
## 67  0.8072289   6     0.916     80.9      16.5       13.1  43,919       11
## 68  0.9425770 140     0.579     61.4      11.5        7.0   3,852       -1
## 69  0.7072000  29     0.865     80.9      17.6       10.3  24,524       14
## 70         NA  79     0.750     73.4      15.8        8.6  10,939       14
## 71  0.5589569 128     0.627     71.8      10.7        5.6   6,929      -11
## 72  0.8378033 182     0.411     58.8       8.7        2.4   1,096        0
## 73  0.8687898 178     0.420     55.2       9.0        2.8   1,362       -1
## 74  0.5291925 124     0.636     66.4      10.3        8.5   6,522       -4
## 75  0.8577465 163     0.483     62.8       8.7        4.9   1,669        4
## 76  0.5162847 131     0.606     73.1      11.1        5.5   3,938        7
## 77  0.7566372  12     0.910     84.0      15.6       11.2  53,959       -2
## 78  0.7466667  44     0.828     75.2      15.4       11.6  22,916        3
## 79  0.9108527  16     0.899     82.6      19.0       10.6  35,182       12
## 80  0.3379224 130     0.609     68.0      11.7        5.4   5,497       -4
## 81  0.6104513 110     0.684     68.9      13.0        7.6   9,788       -9
## 82  0.2255435  69     0.766     75.4      15.1        8.2  15,440        4
## 83  0.2134670 121     0.654     69.4      10.1        6.4  14,003      -44
## 84  0.7797357   6     0.916     80.9      18.6       12.2  39,568       16
## 85  0.8379161  18     0.894     82.4      16.0       12.5  30,676       16
## 86  0.6655462  27     0.873     83.1      16.0       10.1  33,030        4
## 87  0.7912553  99     0.719     75.7      12.4        9.7   7,415       13
## 88  0.6931818  20     0.891     83.5      15.3       11.5  36,927        7
## 89  0.2342342  80     0.748     74.0      13.5        9.9  11,365       11
## 90  0.8690629  56     0.788     69.4      15.0       11.4  20,867       -1
## 91  0.8591160 145     0.548     61.6      11.0        6.3   2,762        9
## 92         NA 137     0.590     66.0      12.3        7.8   2,434       21
## 93  0.6948682  17     0.898     81.9      16.9       11.9  33,890       13
## 94  0.5246691  48     0.816     74.4      14.7        7.2  83,961      -46
## 95  0.7044025 120     0.655     70.6      12.5       10.6   3,044       29
## 96  0.9646018 141     0.575     66.2      10.6        5.0   4,680       -6
## 97  0.6729323  NA     0.748     75.0      14.0        8.2  14,242       NA
## 98  0.8121302  46     0.819     74.2      15.2       11.5  22,281        4
## 99  0.3286319  67     0.769     79.3      13.8        7.9  16,509       -1
## 100 0.8027211 161     0.497     49.8      11.1        5.9   3,306      -16
## 101 0.8981481 177     0.430     60.9       9.5        4.1     805        7
## 102 0.3926702  94     0.724     71.6      14.0        7.3  14,911      -19
## 103        NA  13     0.908     80.0      15.0       11.8  79,851      -10
## 104 0.8291233  37     0.839     73.3      16.4       12.4  24,500        7
## 105 0.7848297  19     0.892     81.7      13.9       11.7  58,711      -11
## 106 0.9569061 154     0.510     65.1      10.3        6.0   1,328       24
## 107 1.0380368 173     0.445     62.8      10.8        4.3     747       13
## 108 0.5880795  62     0.779     74.7      12.7       10.0  22,762      -14
## 109 0.7251613 104     0.706     76.8      13.0        5.8  12,328      -19
## 110 0.6240786 179     0.419     58.0       8.4        2.0   1,583       -8
## 111 0.5716440  37     0.839     80.6      14.4       10.3  27,930       -1
## 112 0.3628319 156     0.506     63.1       8.5        3.8   3,560      -14
## 113 0.5876011  63     0.777     74.4      15.6        8.5  17,470        0
## 114 0.5644556  74     0.756     76.8      13.1        8.5  16,056       -4
## 115        NA 123     0.640     69.1      11.7        9.7   3,432       21
## 116 0.8506787 107     0.693     71.6      11.9       11.2   5,223       23
## 117 0.8167388  90     0.727     69.4      14.6        9.3  10,729        4
## 118 0.7504363  49     0.802     76.2      15.2       11.2  14,558       27
## 119 0.3496042 126     0.628     74.0      11.6        4.4   6,850       -8
## 120 1.0326087 180     0.416     55.1       9.3        3.2   1,123        1
## 121 0.9137303 148     0.536     65.9       8.6        4.1   4,608      -12
## 122 0.8587127 126     0.628     64.8      11.3        6.2   9,418      -21
## 123 0.9173364 145     0.548     69.6      12.4        3.3   2,311       16
## 124 0.8286119   5     0.922     81.6      17.9       11.9  45,435        9
## 125 0.8401084   9     0.913     81.8      19.2       12.5  32,689       23
## 126 0.5902864 125     0.631     74.9      11.5        6.0   4,457       12
## 127 0.4459309 188     0.348     61.4       5.4        1.5     908       -5
## 128 0.7566719 152     0.514     52.8       9.0        5.9   5,341      -24
## 129 0.8908297   1     0.944     81.6      17.5       12.6  64,992        5
## 130 0.3510896  52     0.793     76.8      13.6        8.0  34,858      -23
## 131 0.2967431 147     0.538     66.2       7.8        4.7   4,866      -14
## 132        NA  60     0.780     72.7      13.7       12.3  13,496       18
## 133 0.2319277 113     0.677     72.9      13.0        8.9   4,699       21
## 134 0.5990220  60     0.780     77.6      13.3        9.3  18,192        1
## 135 0.9527027 158     0.505     62.6       9.9        4.0   2,463       -1
## 136 0.6568396 112     0.679     72.9      11.9        7.7   7,643       -3
## 137 0.8080569  84     0.734     74.6      13.1        9.0  11,015        8
## 138 0.6411543 115     0.668     68.2      11.3        8.9   7,915       -7
## 139 0.7534669  36     0.843     77.4      15.5       11.8  23,177       10
## 140 0.8293051  43     0.830     80.9      16.3        8.2  25,757       -2
## 141 0.5319372  32     0.850     78.2      13.8        9.1 123,124      -31
## 142 0.7503852  52     0.793     74.7      14.2       10.8  18,108       10
## 143 0.7963738  50     0.798     70.1      14.7       12.0  22,352       -1
## 144 1.0128957 163     0.483     64.2      10.3        3.7   1,458       11
## 145        NA  77     0.752     73.8      12.9        8.4  20,805      -21
## 146 0.8228346  89     0.729     75.1      12.6        9.3   9,765       14
## 147 0.7141026  97     0.720     72.9      13.4        8.6   9,937        3
## 148 0.4023973 105     0.702     73.4      12.9       10.3   5,327       24
## 149 0.5822622 143     0.555     66.5      11.3        4.7   2,918        8
## 150 0.2579821  39     0.837     74.3      16.3        8.7  52,821      -27
## 151 0.7500000 170     0.466     66.5       7.9        2.5   2,188       -8
## 152 0.7307061  66     0.771     74.9      14.4       10.5  12,190       20
## 153        NA  64     0.772     73.1      13.4        9.4  23,300      -19
## 154 0.9521739 181     0.413     50.9       8.6        3.1   1,780      -16
## 155 0.7616580  11     0.912     83.0      15.4       10.6  76,628       -7
## 156 0.7448980  35     0.844     76.3      15.1       12.2  25,845        5
## 157 0.8275316  25     0.880     80.4      16.8       11.9  27,852       12
## 158 0.6759494 156     0.506     67.9       9.2        5.0   1,540       16
## 159 0.7355372 116     0.666     57.4      13.6        9.9  12,122      -29
## 160 0.3711083  NA     0.607     68.4      11.2        5.5   5,605       NA
## 161        NA 169     0.467     55.7       7.6        5.4   2,332       -9
## 162 0.7978723  26     0.876     82.6      17.3        9.6  32,045        7
## 163 0.4600262  73     0.757     74.9      13.7       10.8   9,779       29
## 164 0.8537859  NA     0.518     58.5       9.6        5.2   3,363       NA
## 165 0.4118421 167     0.479     63.5       7.0        3.1   3,809      -27
## 166 0.5886628 103     0.714     71.1      12.7        7.7  15,617      -32
## 167 0.6131285 150     0.531     49.0      11.3        7.1   5,542      -25
## 168 0.8880707  14     0.907     82.2      15.8       12.1  45,636       -1
## 169 0.8251001   3     0.930     83.0      15.8       12.8  56,431        6
## 170 0.1856946 134     0.594     69.6      12.3        6.3   2,728       21
## 171 0.7639429 129     0.624     69.4      11.2       10.4   2,517       27
## 172 0.9767184 151     0.521     65.0       9.2        5.1   2,411        8
## 173 0.7967782  93     0.726     74.4      13.5        7.3  13,323      -13
## 174 0.6385185  81     0.747     75.4      13.4        9.3  11,780        9
## 175 0.4842520 133     0.595     68.2      11.7        4.4   5,363       -6
## 176 0.9913899 162     0.484     59.7      12.2        4.5   1,228       17
## 177 0.7171582 100     0.717     72.8      14.7       10.7   5,069       32
## 178 0.7019868  64     0.772     70.4      12.3       10.9  26,090      -25
## 179 0.3540197  96     0.721     74.8      14.6        6.8  10,404        1
## 180 0.4152542  72     0.761     75.3      14.5        7.6  18,677      -12
## 181 0.6098830 109     0.688     65.6      10.8        9.9  13,066      -28
## 182 0.9570707 163     0.483     58.5       9.8        5.4   1,613        6
## 183 0.7952167  81     0.747     71.0      15.1       11.3   8,178       25
## 184 0.5054348  41     0.835     77.0      13.3        9.5  60,868      -34
## 185 0.8107715  14     0.907     80.7      16.2       13.1  39,267        9
## 186 0.8171263   8     0.915     79.1      16.5       12.9  52,947        3
## 187 0.7239583  52     0.793     77.2      15.5        8.5  19,283        7
## 188 0.6362434 114     0.675     68.4      11.5       10.9   5,567       10
## 189 0.7687500 134     0.594     71.9      10.6        6.8   2,803       19
## 190 0.6452020  71     0.762     74.2      14.2        8.9  16,159       -2
## 191 0.8880779 116     0.666     75.8      11.9        7.5   5,092       15
## 192 0.6558018  NA     0.711     71.5      12.2        7.9  14,301       NA
## 193 0.3518006 160     0.498     63.8       9.2        2.6   3,519      -17
## 194 0.8539720 139     0.586     60.1      13.5        6.6   3,734        2
## 195 0.9275362 155     0.509     57.5      10.9        7.3   1,615       13

#Data includes 195 observations and 19 variables. There is two characters variables, four intervals and the others are numeric.

#Mutate the Gross National Income (GrossNat) variable into numeric:

library(stringr)
library(dplyr)
str(human$GNI)
##  chr [1:195] "1,885" "9,943" "13,054" "43,978" "6,822" "20,070" ...
human <- mutate(human, GNI = str_replace(human$GNI, pattern=",", replace ="") %>% as.numeric)
str(human$GNI)
##  num [1:195] 1885 9943 13054 43978 6822 ...

#Exclude unneeded variables

names(human)
##  [1] "Country"    "GII"        "gender.GII" "mater.mor"  "adol.birth"
##  [6] "rep.parl"   "sec.edu.F"  "sec.edu.M"  "lab.F"      "lab.M"     
## [11] "edu2.FM"    "lab.FM"     "HDI"        "Human.HDI"  "Life.exp"  
## [16] "Years.exp"  "Years.mean" "GNI"        "GNI.calc"
keep <- c("Country", "edu2.FM", "lab.FM", "Life.exp", "Years.exp", "GNI", "mater.mor", "adol.birth", "rep.parl")
human <- dplyr::select(human, one_of(keep))

#Remove all rows with missing values

complete.cases(human)
##   [1]  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [12]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [23]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [34]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [45]  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [56] FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [67]  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE
##  [78]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [89]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
## [100]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [111]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [122]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE
## [133] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [144]  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [155]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [166]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
## [177]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [188] FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
data.frame(human [-1], comp=complete.cases(human))
##       edu2.FM    lab.FM Life.exp Years.exp    GNI mater.mor adol.birth
## 1   0.1979866 0.1987421     60.4       9.3   1885       400       86.8
## 2   0.9306030 0.6854962     77.8      11.8   9943        21       15.3
## 3   0.8612903 0.2105263     74.8      14.0  13054        89       10.0
## 4   1.0040568        NA     81.3      13.5  43978        NA         NA
## 5          NA 0.8231469     52.3      11.4   6822       460      170.2
## 6          NA        NA     76.1      14.0  20070        NA       49.3
## 7   0.7289916 0.3081009     70.6      12.0  15722       155       45.4
## 8   0.9774306 0.6333333     76.3      17.9  22050        69       54.4
## 9   0.9894737 0.7465565     74.7      12.3   8124        29       27.1
## 10  0.9968288 0.8189415     82.4      20.2  42261         6       12.1
## 11  1.0000000 0.8064993     81.4      15.7  43869         4        4.1
## 12  0.9620123 0.9037356     70.8      11.9  16428        26       40.0
## 13  1.0410959 0.8738966     75.4      12.6  21336        37       28.5
## 14  1.1031128 0.4510932     76.6      14.4  38599        22       13.8
## 15  0.8256659 0.6825208     71.6      10.0   3191       170       80.6
## 16  1.0205245 0.8603133     75.6      15.4  12488        52       48.4
## 17  0.9436009 0.7939778     71.3      15.7  16676         1       20.6
## 18  0.9348613 0.8010118     80.8      16.3  41187         6        6.7
## 19  1.0079156 0.5978129     70.0      13.6   7614        45       71.4
## 20  0.4185185 0.8633461     59.6      11.1   1767       340       90.2
## 21  0.9855072 0.8639896     69.5      12.6   7176       120       40.9
## 22  0.8054146 0.7935723     68.3      13.2   5760       200       71.9
## 23  0.6432665 0.5951134     76.5      13.6   9638         8       15.1
## 24  0.9448010 0.8811275     64.5      12.5  16646       170       44.2
## 25  1.0419847 0.7351485     74.5      15.2  15175        69       70.8
## 26  0.9424779 0.6985392     78.8      14.5  72570        27       23.0
## 27  0.9717868 0.8118644     74.2      14.4  15596         5       35.9
## 28  0.2812500 0.8566667     58.7       7.8   1591       400      115.4
## 29  0.6385542 1.0158537     56.7      10.1    758       740       30.3
## 30         NA 0.6152927     73.3      13.5   6094        53       70.6
## 31  0.4323144 0.9109827     68.4      10.9   2949       170       44.3
## 32  0.6103152 0.8307292     55.5      10.4   2803       590      115.8
## 33  1.0000000 0.8676056     82.0      15.9  42155        11       14.5
## 34  0.3782772 0.8531140     50.7       7.2    581       880       98.3
## 35  0.1717172 0.8080808     51.6       7.4   2085       980      152.0
## 36  0.9594241 0.6577540     81.7      15.2  21290        22       55.3
## 37  0.8164117 0.8160920     75.8      13.1  12547        32        8.6
## 38  1.0233813 0.7001255     74.0      13.5  12040        83       68.5
## 39         NA 0.4394507     63.3      11.5   1456       350       51.1
## 40  0.8446809 0.9383562     62.3      11.1   6012       410      126.7
## 41  0.3950617 0.9658470     58.7       9.8    680       730      135.3
## 42  1.0039604 0.5898734     79.4      13.9  13413        38       60.8
## 43  0.4651163 0.6437346     51.5       8.9   3171       720      130.3
## 44  0.9081197 0.7654110     77.3      14.8  19409        13       12.7
## 45  0.9428934 0.6200000     79.4      13.8   7301        80       43.1
## 46  0.9302326 0.7876231     80.2      14.0  28633        10        5.5
## 47  1.0020060 0.7481698     78.6      16.4  26660         5        4.9
## 48  0.9886128 0.8840361     80.2      18.7  44025         5        5.1
## 49         NA 0.5361891     62.0       6.4   3276       230       18.6
## 50  1.2801724        NA     77.8      12.7   9994        NA         NA
## 51  1.0470810 0.6526718     73.5      13.1  11883       100       99.6
## 52  0.8250377 0.7884131     74.0      12.7  11449        72       21.2
## 53  1.0177665 0.6614268     75.9      14.2  10605        87       77.0
## 54  0.7244224 0.3168449     71.1      13.5  10512        45       43.0
## 55  0.8440367 0.6050633     73.0      12.3   7349        69       76.0
## 56         NA 0.8752711     57.6       9.0  21056       290      112.6
## 57         NA 0.8908686     63.7       4.1   1130       380       65.3
## 58  1.0000000 0.8156749     76.8      16.5  25214        11       16.8
## 59  0.4285714 0.8756999     64.1       8.5   1428       420       78.4
## 60  0.8784119 0.6514286     72.3      13.6  12791        28       30.8
## 61  0.9953488 0.5208333     70.0      15.7   7493        59       42.8
## 62  1.0000000 0.8703125     80.8      17.1  38695         4        9.2
## 63  0.9375000 0.8230519     82.2      16.0  38056        12        5.7
## 64  1.4930748 0.8593272     64.4      12.5  16367       240      103.0
## 65  0.5523810 0.8709288     60.2       8.8   1507       430      115.8
## 66  0.9676375 0.7523302     74.9      13.8   7164        41       46.8
## 67  0.9927835 0.8072289     80.9      16.5  43919         7        3.8
## 68  0.6986090 0.9425770     61.4      11.5   3852       380       58.4
## 69  0.8880597 0.7072000     80.9      17.6  24524         5       11.9
## 70         NA        NA     73.4      15.8  10939        23       35.4
## 71  0.9439655 0.5589569     71.8      10.7   6929       140       97.2
## 72         NA 0.8378033     58.8       8.7   1096       650      131.0
## 73         NA 0.8687898     55.2       9.0   1362       560       99.3
## 74  1.2615063 0.5291925     66.4      10.3   6522       250       88.5
## 75  0.6363636 0.8577465     62.8       8.7   1669       380       42.0
## 76  1.0852713 0.5162847     73.1      11.1   3938       120       84.0
## 77  0.9116162 0.7566372     84.0      15.6  53959        NA        3.3
## 78  0.9918946 0.7466667     75.2      15.4  22916        14       12.1
## 79  0.9934498 0.9108527     82.6      19.0  35182         4       11.5
## 80  0.4770318 0.3379224     68.0      11.7   5497       190       32.8
## 81  0.8109756 0.6104513     68.9      13.0   9788       190       48.3
## 82  0.9201183 0.2255435     75.4      15.1  15440        23       31.6
## 83  0.5537849 0.2134670     69.4      10.1  14003        67       68.7
## 84  1.0241730 0.7797357     80.9      18.6  39568         9        8.2
## 85  0.9667812 0.8379161     82.4      16.0  30676         2        7.8
## 86  0.8844720 0.6655462     83.1      16.0  33030         4        4.0
## 87  1.0541311 0.7912553     75.7      12.4   7415        80       70.1
## 88  1.0139860 0.6931818     83.5      15.3  36927         6        5.4
## 89  0.8853503 0.2342342     74.0      13.5  11365        50       26.5
## 90  0.9645749 0.8690629     69.4      15.0  20867        26       29.9
## 91  0.8057325 0.8591160     61.6      11.0   2762       400       93.6
## 92         NA        NA     66.0      12.3   2434       130       16.6
## 93  0.8641975 0.6948682     81.9      16.9  33890        27        2.2
## 94  0.9875666 0.5246691     74.4      14.7  83961        14       14.5
## 95  0.9762397 0.7044025     70.6      12.5   3044        75       29.3
## 96  0.6189189 0.9646018     66.2      10.6   4680        NA       65.0
## 97  0.9836957 0.6729323     75.0      14.0  14242        85       68.3
## 98  0.9989899 0.8121302     74.2      15.2  22281        13       13.5
## 99  0.9566787 0.3286319     79.3      13.8  16509        16       12.0
## 100 1.1526316 0.8027211     49.8      11.1   3306       490       89.4
## 101 0.3918575 0.8981481     60.9       9.5    805       640      117.4
## 102 1.3245823 0.3926702     71.6      14.0  14911        15        2.5
## 103        NA        NA     80.0      15.0  79851        NA         NA
## 104 0.9448568 0.8291233     73.3      16.4  24500        11       10.6
## 105 1.0000000 0.7848297     81.7      13.9  58711        11        8.3
## 106        NA 0.9569061     65.1      10.3   1328       440      122.8
## 107 0.5138889 1.0380368     62.8      10.8    747       510      144.8
## 108 0.9130435 0.5880795     74.7      12.7  22762        29        5.7
## 109 0.8348624 0.7251613     76.8      13.0  12328        31        4.2
## 110 0.5099338 0.6240786     58.0       8.4   1583       550      175.6
## 111 0.8772379 0.5716440     80.6      14.4  27930         9       18.2
## 112 0.3971292 0.3628319     63.1       8.5   3560       320       73.3
## 113 0.8517241 0.5876011     74.4      15.6  17470        73       30.9
## 114 0.9191419 0.5644556     76.8      13.1  16056        49       63.4
## 115        NA        NA     69.1      11.7   3432        96       18.6
## 116 0.9689441 0.8506787     71.6      11.9   5223        21       29.3
## 117 1.0142687 0.8167388     69.4      14.6  10729        68       18.7
## 118 0.8891235 0.7504363     76.2      15.2  14558         7       15.2
## 119 0.6854305 0.3496042     74.0      11.6   6850       120       35.8
## 120 0.2258065 1.0326087     55.1       9.3   1123       480      137.8
## 121 1.4967320 0.9137303     65.9       8.6   4608       200       12.1
## 122 0.9680233 0.8587127     64.8      11.3   9418       130       54.9
## 123 0.4633508 0.9173364     69.6      12.4   2311       190       73.7
## 124 0.9690608 0.8286119     81.6      17.9  45435         6        6.2
## 125 0.9968520 0.8401084     81.8      19.2  32689         8       25.3
## 126 1.0287206 0.5902864     74.9      11.5   4457       100      100.8
## 127 0.3076923 0.4459309     61.4       5.4    908       630      204.8
## 128        NA 0.7566719     52.8       9.0   5341       560      119.6
## 129 1.0072389 0.8908297     81.6      17.5  64992         4        7.8
## 130 0.8266200 0.3510896     76.8      13.6  34858        11       10.6
## 131 0.4186551 0.2967431     66.2       7.8   4866       170       27.3
## 132        NA        NA     72.7      13.7  13496        NA         NA
## 133 0.9074074 0.2319277     72.9      13.0   4699        NA       45.8
## 134 1.0821643 0.5990220     77.6      13.3  18192        85       78.5
## 135 0.5241379 0.9527027     62.6       9.9   2463       220       62.1
## 136 0.8558140 0.6568396     72.9      11.9   7643       110       67.0
## 137 0.8517398 0.8080569     74.6      13.1  11015        89       50.7
## 138 1.0345369 0.6411543     68.2      11.3   7915       120       46.8
## 139 0.9286550 0.7534669     77.4      15.5  23177         3       12.2
## 140 0.9896266 0.8293051     80.9      16.3  25757         8       12.6
## 141 1.1305085 0.5319372     78.2      13.8 123124         6        9.5
## 142 0.9358696 0.7503852     74.7      14.2  18108        33       31.0
## 143 0.9686486 0.7963738     70.1      14.7  22352        24       25.7
## 144 0.9090909 1.0128957     64.2      10.3   1458       320       33.6
## 145        NA        NA     73.8      12.9  20805        NA         NA
## 146        NA 0.8228346     75.1      12.6   9765        34       56.3
## 147        NA 0.7141026     72.9      13.4   9937        45       54.5
## 148 1.0716667 0.4023973     73.4      12.9   5327        58       28.3
## 149        NA 0.5822622     66.5      11.3   2918       210       65.1
## 150 0.8605974 0.2579821     74.3      16.3  52821        16       10.2
## 151 0.4675325 0.7500000     66.5       7.9   2188       320       94.4
## 152 0.7934783 0.7307061     74.9      14.4  12190        16       16.9
## 153 1.0045045        NA     73.1      13.4  23300        NA       56.3
## 154 0.4608295 0.9521739     50.9       8.6   1780      1100      100.7
## 155 0.9148148 0.7616580     83.0      15.4  76628         6        6.0
## 156 0.9959799 0.7448980     76.3      15.1  25845         7       15.9
## 157 0.9775510 0.8275316     80.4      16.8  27852         7        0.6
## 158        NA 0.6759494     67.9       9.2   1540       130       64.9
## 159 0.9578393 0.7355372     57.4      13.6  12122       140       50.9
## 160 0.5329670 0.3711083     68.4      11.2   5605       183       38.7
## 161        NA        NA     55.7       7.6   2332       730       75.3
## 162 0.9138167 0.7978723     82.6      17.3  32045         4       10.6
## 163 0.9515707 0.4600262     74.9      13.7   9779        29       16.9
## 164 0.7015873 0.8537859     58.5       9.6   3363       506      109.7
## 165 0.6648352 0.4118421     63.5       7.0   3809       360       84.0
## 166 0.9469214 0.5886628     71.1      12.7  15617       130       35.2
## 167 0.8423077 0.6131285     49.0      11.3   5542       310       72.0
## 168 0.9908362 0.8880707     82.2      15.8  45636         4        6.5
## 169 0.9834369 0.8251001     83.0      15.8  56431         6        1.9
## 170 0.7283951 0.1856946     69.6      12.3   2728        49       41.6
## 171 1.0427632 0.7639429     69.4      11.2   2517        44       42.8
## 172 0.5894737 0.9767184     65.0       9.2   2411       410      122.7
## 173 0.8750000 0.7967782     74.4      13.5  13323        26       41.0
## 174 0.7230216 0.6385185     75.4      13.4  11780         7       18.3
## 175        NA 0.4842520     68.2      11.7   5363       270       52.2
## 176 0.3995037 0.9913899     59.7      12.2   1228       450       91.5
## 177 0.9909400 0.7171582     72.8      14.7   5069       120       18.1
## 178 0.9802956 0.7019868     70.4      12.3  26090        84       34.8
## 179 0.7114967 0.3540197     74.8      14.6  10404        46        4.6
## 180 0.6500000 0.4152542     75.3      14.5  18677        20       30.9
## 181        NA 0.6098830     65.6      10.8  13066        61       18.0
## 182 0.6835821 0.9570707     58.5       9.8   1613       360      126.6
## 183 0.9562044 0.7952167     71.0      15.1   8178        23       25.7
## 184 1.1944444 0.5054348     77.0      13.3  60868         8       27.6
## 185 0.9989990 0.8107715     80.7      16.2  39267         8       25.8
## 186 1.0031646 0.8171263     79.1      16.5  52947        28       31.0
## 187 1.0815109 0.7239583     77.2      15.5  19283        14       58.3
## 188        NA 0.6362434     68.4      11.5   5567        36       38.8
## 189        NA 0.7687500     71.9      10.6   2803        86       44.8
## 190 1.1141732 0.6452020     74.2      14.2  16159       110       83.2
## 191 0.8342697 0.8880779     75.8      11.9   5092        49       29.0
## 192 0.8333333 0.6558018     71.5      12.2  14301       210       47.4
## 193 0.3220974 0.3518006     63.8       9.2   3519       270       47.0
## 194 0.5863636 0.8539720     60.1      13.5   3734       280      125.4
## 195 0.7854839 0.9275362     57.5      10.9   1615       470       60.3
##     rep.parl  comp
## 1       27.6  TRUE
## 2       20.7  TRUE
## 3       25.7  TRUE
## 4       50.0 FALSE
## 5       36.8 FALSE
## 6       25.7 FALSE
## 7       14.0  TRUE
## 8       36.8  TRUE
## 9       10.7  TRUE
## 10      30.5  TRUE
## 11      30.3  TRUE
## 12      15.6  TRUE
## 13      16.7  TRUE
## 14      15.0  TRUE
## 15      20.0  TRUE
## 16      19.6  TRUE
## 17      30.1  TRUE
## 18      42.4  TRUE
## 19      13.3  TRUE
## 20       8.4  TRUE
## 21       8.3  TRUE
## 22      51.8  TRUE
## 23      19.3  TRUE
## 24       9.5  TRUE
## 25       9.6  TRUE
## 26        NA FALSE
## 27      20.4  TRUE
## 28      13.3  TRUE
## 29      34.9  TRUE
## 30      20.8 FALSE
## 31      19.0  TRUE
## 32      27.1  TRUE
## 33      28.2  TRUE
## 34      12.5  TRUE
## 35      14.9  TRUE
## 36      15.8  TRUE
## 37      23.6  TRUE
## 38      20.9  TRUE
## 39       3.0 FALSE
## 40      11.5  TRUE
## 41       8.2  TRUE
## 42      33.3  TRUE
## 43       9.2  TRUE
## 44      25.8  TRUE
## 45      48.9  TRUE
## 46      12.5  TRUE
## 47      18.9  TRUE
## 48      38.0  TRUE
## 49      12.7 FALSE
## 50      21.9 FALSE
## 51      19.1  TRUE
## 52      18.7  TRUE
## 53      41.6  TRUE
## 54       2.2  TRUE
## 55      27.4  TRUE
## 56      19.7 FALSE
## 57      22.0 FALSE
## 58      19.8  TRUE
## 59      25.5  TRUE
## 60      19.0  TRUE
## 61      14.0  TRUE
## 62      42.5  TRUE
## 63      25.7  TRUE
## 64      16.2  TRUE
## 65       9.4  TRUE
## 66      11.3  TRUE
## 67      36.9  TRUE
## 68      10.9  TRUE
## 69      21.0  TRUE
## 70      25.0 FALSE
## 71      13.3  TRUE
## 72      21.9 FALSE
## 73      13.7 FALSE
## 74      31.3  TRUE
## 75       3.5  TRUE
## 76      25.8  TRUE
## 77        NA FALSE
## 78      10.1  TRUE
## 79      41.3  TRUE
## 80      12.2  TRUE
## 81      17.1  TRUE
## 82       3.1  TRUE
## 83      26.5  TRUE
## 84      19.9  TRUE
## 85      22.5  TRUE
## 86      30.1  TRUE
## 87      16.7  TRUE
## 88      11.6  TRUE
## 89      11.6  TRUE
## 90      20.1  TRUE
## 91      20.8  TRUE
## 92       8.7 FALSE
## 93      16.3  TRUE
## 94       1.5  TRUE
## 95      23.3  TRUE
## 96      25.0 FALSE
## 97      27.0  TRUE
## 98      18.0  TRUE
## 99       3.1  TRUE
## 100     26.8  TRUE
## 101     10.7  TRUE
## 102     16.0  TRUE
## 103     20.0 FALSE
## 104     23.4  TRUE
## 105     28.3  TRUE
## 106     20.5 FALSE
## 107     16.7  TRUE
## 108     14.2  TRUE
## 109      5.9  TRUE
## 110      9.5  TRUE
## 111     13.0  TRUE
## 112     22.2  TRUE
## 113     11.6  TRUE
## 114     37.1  TRUE
## 115      0.0 FALSE
## 116     20.8  TRUE
## 117     14.9  TRUE
## 118     17.3  TRUE
## 119     11.0  TRUE
## 120     39.6  TRUE
## 121      4.7  TRUE
## 122     37.7  TRUE
## 123     29.5  TRUE
## 124     36.9  TRUE
## 125     31.4  TRUE
## 126     39.1  TRUE
## 127     13.3  TRUE
## 128      6.6 FALSE
## 129     39.6  TRUE
## 130      9.6  TRUE
## 131     19.7  TRUE
## 132     10.3 FALSE
## 133       NA FALSE
## 134     19.3  TRUE
## 135      2.7  TRUE
## 136     16.8  TRUE
## 137     22.3  TRUE
## 138     27.1  TRUE
## 139     22.1  TRUE
## 140     31.3  TRUE
## 141      0.0  TRUE
## 142     12.0  TRUE
## 143     14.5  TRUE
## 144     57.5  TRUE
## 145      6.7 FALSE
## 146     20.7 FALSE
## 147     13.0 FALSE
## 148      6.1  TRUE
## 149     18.2 FALSE
## 150     19.9  TRUE
## 151     42.7  TRUE
## 152     34.0  TRUE
## 153     43.8 FALSE
## 154     12.4  TRUE
## 155     25.3  TRUE
## 156     18.7  TRUE
## 157     27.7  TRUE
## 158      2.0 FALSE
## 159     40.7  TRUE
## 160     17.5  TRUE
## 161     24.3 FALSE
## 162     38.0  TRUE
## 163      5.8  TRUE
## 164     22.5  TRUE
## 165     23.8  TRUE
## 166     11.8  TRUE
## 167     14.7  TRUE
## 168     43.6  TRUE
## 169     28.5  TRUE
## 170     12.4  TRUE
## 171     15.2  TRUE
## 172     36.0  TRUE
## 173      6.1  TRUE
## 174     33.3  TRUE
## 175     38.5 FALSE
## 176     17.6  TRUE
## 177      0.0  TRUE
## 178     24.7  TRUE
## 179     31.3  TRUE
## 180     14.4  TRUE
## 181     25.8 FALSE
## 182     35.0  TRUE
## 183     11.8  TRUE
## 184     17.5  TRUE
## 185     23.5  TRUE
## 186     19.4  TRUE
## 187     11.5  TRUE
## 188     16.4 FALSE
## 189      0.0 FALSE
## 190     17.0  TRUE
## 191     24.3  TRUE
## 192     21.8  TRUE
## 193      0.7  TRUE
## 194     12.7  TRUE
## 195     35.1  TRUE
human_ <- filter(human, complete.cases(human))
complete.cases(human_)
##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [15] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [29] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [43] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [57] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [99] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [113] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [127] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [141] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [155] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

#Remove the observations which relate to regions instead of countries

dim(human_)
## [1] 162   9
tail(human_, n=10)
##                                Country   edu2.FM    lab.FM Life.exp
## 153               United Arab Emirates 1.1944444 0.5054348     77.0
## 154                     United Kingdom 0.9989990 0.8107715     80.7
## 155                      United States 1.0031646 0.8171263     79.1
## 156                            Uruguay 1.0815109 0.7239583     77.2
## 157 Venezuela (Bolivarian Republic of) 1.1141732 0.6452020     74.2
## 158                           Viet Nam 0.8342697 0.8880779     75.8
## 159                              World 0.8333333 0.6558018     71.5
## 160                              Yemen 0.3220974 0.3518006     63.8
## 161                             Zambia 0.5863636 0.8539720     60.1
## 162                           Zimbabwe 0.7854839 0.9275362     57.5
##     Years.exp   GNI mater.mor adol.birth rep.parl
## 153      13.3 60868         8       27.6     17.5
## 154      16.2 39267         8       25.8     23.5
## 155      16.5 52947        28       31.0     19.4
## 156      15.5 19283        14       58.3     11.5
## 157      14.2 16159       110       83.2     17.0
## 158      11.9  5092        49       29.0     24.3
## 159      12.2 14301       210       47.4     21.8
## 160       9.2  3519       270       47.0      0.7
## 161      13.5  3734       280      125.4     12.7
## 162      10.9  1615       470       60.3     35.1
last <- nrow(human_) - 7
human_ <- human_[1:last,]
dim(human_)
## [1] 155   9

#Define the row names of the data and remove the country name column from the data.

rownames(human_) <- human_$Country
human_ <- dplyr::select(human_, -Country)
dim(human_)
## [1] 155   8
str(human_)
## 'data.frame':    155 obs. of  8 variables:
##  $ edu2.FM   : num  0.198 0.931 0.861 0.729 0.977 ...
##  $ lab.FM    : num  0.199 0.685 0.211 0.308 0.633 ...
##  $ Life.exp  : num  60.4 77.8 74.8 70.6 76.3 74.7 82.4 81.4 70.8 75.4 ...
##  $ Years.exp : num  9.3 11.8 14 12 17.9 12.3 20.2 15.7 11.9 12.6 ...
##  $ GNI       : num  1885 9943 13054 15722 22050 ...
##  $ mater.mor : int  400 21 89 155 69 29 6 4 26 37 ...
##  $ adol.birth: num  86.8 15.3 10 45.4 54.4 27.1 12.1 4.1 40 28.5 ...
##  $ rep.parl  : num  27.6 20.7 25.7 14 36.8 10.7 30.5 30.3 15.6 16.7 ...
write.csv(human_, file = "~/IODS-project/data/human_")

#Graphical overview and summaries of the variables

library(GGally); library(ggplot2); library(corrplot)
ggpairs(human_)

cor(human_)%>%corrplot()

#The most strongest correlation is between mater mortality ratio and life expectancy at birth, and the correlation is negative.

#Analysis of PCA without standardized data

pca_human <- prcomp(human_)
biplot(pca_human, choices = 1:2, cex = c(0.8,1), col = c("grey40", "deeppink2"))
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

#Standardize the variables in the human data and same analysis as above

human_std <- scale(human_)
pca_human2 <- prcomp(human_std)
biplot(pca_human2, choices = 1:2, cex = c(0.8,1), col = c("grey40", "deeppink2"))

library(ggplot2); library(FactoMineR); library(dplyr); library(tidyr)
data(tea)
str(tea)
## 'data.frame':    300 obs. of  36 variables:
##  $ breakfast       : Factor w/ 2 levels "breakfast","Not.breakfast": 1 1 2 2 1 2 1 2 1 1 ...
##  $ tea.time        : Factor w/ 2 levels "Not.tea time",..: 1 1 2 1 1 1 2 2 2 1 ...
##  $ evening         : Factor w/ 2 levels "evening","Not.evening": 2 2 1 2 1 2 2 1 2 1 ...
##  $ lunch           : Factor w/ 2 levels "lunch","Not.lunch": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dinner          : Factor w/ 2 levels "dinner","Not.dinner": 2 2 1 1 2 1 2 2 2 2 ...
##  $ always          : Factor w/ 2 levels "always","Not.always": 2 2 2 2 1 2 2 2 2 2 ...
##  $ home            : Factor w/ 2 levels "home","Not.home": 1 1 1 1 1 1 1 1 1 1 ...
##  $ work            : Factor w/ 2 levels "Not.work","work": 1 1 2 1 1 1 1 1 1 1 ...
##  $ tearoom         : Factor w/ 2 levels "Not.tearoom",..: 1 1 1 1 1 1 1 1 1 2 ...
##  $ friends         : Factor w/ 2 levels "friends","Not.friends": 2 2 1 2 2 2 1 2 2 2 ...
##  $ resto           : Factor w/ 2 levels "Not.resto","resto": 1 1 2 1 1 1 1 1 1 1 ...
##  $ pub             : Factor w/ 2 levels "Not.pub","pub": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Tea             : Factor w/ 3 levels "black","Earl Grey",..: 1 1 2 2 2 2 2 1 2 1 ...
##  $ How             : Factor w/ 4 levels "alone","lemon",..: 1 3 1 1 1 1 1 3 3 1 ...
##  $ sugar           : Factor w/ 2 levels "No.sugar","sugar": 2 1 1 2 1 1 1 1 1 1 ...
##  $ how             : Factor w/ 3 levels "tea bag","tea bag+unpackaged",..: 1 1 1 1 1 1 1 1 2 2 ...
##  $ where           : Factor w/ 3 levels "chain store",..: 1 1 1 1 1 1 1 1 2 2 ...
##  $ price           : Factor w/ 6 levels "p_branded","p_cheap",..: 4 6 6 6 6 3 6 6 5 5 ...
##  $ age             : int  39 45 47 23 48 21 37 36 40 37 ...
##  $ sex             : Factor w/ 2 levels "F","M": 2 1 1 2 2 2 2 1 2 2 ...
##  $ SPC             : Factor w/ 7 levels "employee","middle",..: 2 2 4 6 1 6 5 2 5 5 ...
##  $ Sport           : Factor w/ 2 levels "Not.sportsman",..: 2 2 2 1 2 2 2 2 2 1 ...
##  $ age_Q           : Factor w/ 5 levels "15-24","25-34",..: 3 4 4 1 4 1 3 3 3 3 ...
##  $ frequency       : Factor w/ 4 levels "1/day","1 to 2/week",..: 1 1 3 1 3 1 4 2 3 3 ...
##  $ escape.exoticism: Factor w/ 2 levels "escape-exoticism",..: 2 1 2 1 1 2 2 2 2 2 ...
##  $ spirituality    : Factor w/ 2 levels "Not.spirituality",..: 1 1 1 2 2 1 1 1 1 1 ...
##  $ healthy         : Factor w/ 2 levels "healthy","Not.healthy": 1 1 1 1 2 1 1 1 2 1 ...
##  $ diuretic        : Factor w/ 2 levels "diuretic","Not.diuretic": 2 1 1 2 1 2 2 2 2 1 ...
##  $ friendliness    : Factor w/ 2 levels "friendliness",..: 2 2 1 2 1 2 2 1 2 1 ...
##  $ iron.absorption : Factor w/ 2 levels "iron absorption",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ feminine        : Factor w/ 2 levels "feminine","Not.feminine": 2 2 2 2 2 2 2 1 2 2 ...
##  $ sophisticated   : Factor w/ 2 levels "Not.sophisticated",..: 1 1 1 2 1 1 1 2 2 1 ...
##  $ slimming        : Factor w/ 2 levels "No.slimming",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ exciting        : Factor w/ 2 levels "exciting","No.exciting": 2 1 2 2 2 2 2 2 2 2 ...
##  $ relaxing        : Factor w/ 2 levels "No.relaxing",..: 1 1 2 2 2 2 2 2 2 2 ...
##  $ effect.on.health: Factor w/ 2 levels "effect on health",..: 2 2 2 2 2 2 2 2 2 2 ...
dim(tea)
## [1] 300  36
keep_columns <- c("Tea", "How", "how", "sugar", "where", "lunch")
tea_time <- dplyr::select(tea, one_of(keep_columns))
summary(tea_time)
##         Tea         How                      how           sugar    
##  black    : 74   alone:195   tea bag           :170   No.sugar:155  
##  Earl Grey:193   lemon: 33   tea bag+unpackaged: 94   sugar   :145  
##  green    : 33   milk : 63   unpackaged        : 36                 
##                  other:  9                                          
##                   where           lunch    
##  chain store         :192   lunch    : 44  
##  chain store+tea shop: 78   Not.lunch:256  
##  tea shop            : 30                  
## 
str(tea_time)
## 'data.frame':    300 obs. of  6 variables:
##  $ Tea  : Factor w/ 3 levels "black","Earl Grey",..: 1 1 2 2 2 2 2 1 2 1 ...
##  $ How  : Factor w/ 4 levels "alone","lemon",..: 1 3 1 1 1 1 1 3 3 1 ...
##  $ how  : Factor w/ 3 levels "tea bag","tea bag+unpackaged",..: 1 1 1 1 1 1 1 1 2 2 ...
##  $ sugar: Factor w/ 2 levels "No.sugar","sugar": 2 1 1 2 1 1 1 1 1 1 ...
##  $ where: Factor w/ 3 levels "chain store",..: 1 1 1 1 1 1 1 1 2 2 ...
##  $ lunch: Factor w/ 2 levels "lunch","Not.lunch": 2 2 2 2 2 2 2 2 2 2 ...

#Visualizing the dataset and multiple correspondence analysis

gather(tea_time) %>% ggplot(aes(value)) + facet_wrap("key", scales = "free") + geom_bar() + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))
## Warning: attributes are not identical across measure variables;
## they will be dropped

maca <- MCA(tea_time, graph = FALSE)
plot(maca, invisible=c("ind"), habillage = "quali")